library(dplyr)
library(tidyr)
library(ggplot2)
library(plm)
library(sandwich)
library(coefplot)
library(lmtest)
library(effects)
library(jtools)
library(sjPlot)
#install.packages("emmeans")
library(emmeans)

setwd("/Users/amyvt98/Documents/Grad School/Research/Insurrection/BJPS Submission/Final Documents for Publication")

#This is the data for the fixed effects (DiD) model
insurrectiondata<- read.csv("paneldata.csv")

#This is the data for the OLS model
olsdatasmaller<- read.csv("originaldata.csv")

#Data manipulation
insurrectiondata$ICPSRFactor<- as.factor(insurrectiondata$ICPSR)
insurrectiondata$PeriodFactor<- as.factor(insurrectiondata$Period)
insurrectiondata$ObjectorFactor<- as.factor(insurrectiondata$Objector)
insurrectiondata$BigFourFactor<- as.factor(insurrectiondata$BigFour)
insurrectiondata$LeadershipFactor<- as.factor(insurrectiondata$Leadership)
insurrectiondata$CmteChairFactor<- as.factor(insurrectiondata$CmteChairRanking)

olsdatasmaller$ICPSRFactor<- as.factor(olsdatasmaller$ICPSR)
olsdatasmaller$ObjectorFactor<- as.factor(olsdatasmaller$Objector)
olsdatasmaller$BigFourFactor<- as.factor(olsdatasmaller$BigFour2021)
olsdatasmaller$LeadershipFactor<- as.factor(olsdatasmaller$Leadership2021)

#Making the data panel data
pdata <- pdata.frame(insurrectiondata, index = c("ICPSRFactor","Cycle"))
pdata$ICPSRFactor = relevel(pdata$ICPSRFactor, ref="21701")

#Table 1 - descriptive statistics for objectors and non-objectors

#restrict to 2018 Cycle
oldcycle <- pdata %>% filter(Cycle == 2018)
#descriptive data
table(oldcycle$Objector)
mean(oldcycle$PVIAbsolute)
oldcycle %>%
  group_by(Objector) %>%
  summarise(mean_contribution = mean(ContributionThousands, na.rm = TRUE))
oldcycle %>%
  group_by(Objector) %>%
  summarise(mean_ideology = mean(NominateDim1, na.rm = TRUE))
oldcycle %>%
  group_by(Objector) %>%
  summarise(mean_pvi = mean(PVIAbsolute, na.rm = TRUE))


#Figure 1 - Aggregate Contributions to Republicans During 2022 Election Cycle~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#This is the data for Figure 1
republicansallcontributions<- read.csv("republicandata.csv")

#Data manipulation
republicansallcontributions$Leadership2021<- as.factor(republicansallcontributions$Leadership2021)
republicansallcontributions$Objector<- as.factor(republicansallcontributions$Objector)
republicansallcontributions$NSICode<- as.factor(republicansallcontributions$NSICode)
republicansallcontributions$NSICodeNumberWithCertifiers<- as.numeric(republicansallcontributions$NSICodeWithCertifiers)
republicansallcontributions$NSICodeNumber<- as.numeric(republicansallcontributions$NSICode)
republicansallcontributions$Leadership2017Factor<- as.factor(republicansallcontributions$Leadership2017)
republicansallcontributions$CommitteeRankingMember<-as.factor(republicansallcontributions$CommitteeRankingMember)
republicansallcontributions$SubcommitteeRankingMember<- as.factor(republicansallcontributions$SubcommitteeRankingMember)
republicansallcontributions<- republicansallcontributions%>%
  mutate(ContributionThousands2022 = Total2022Contributions/1000)

meancontributionsCommittee<- group_by(republicansallcontributions, Objector, CommitteeRankingMember)
meancontributionsCommittee<- summarize(meancontributionsCommittee, 
                                       mean = mean(ContributionThousands2022))
meancontributionsCommittee <- meancontributionsCommittee[-1,]
meancontributionsCommittee <- meancontributionsCommittee[-2,]

meancontributionsCommittee<- meancontributionsCommittee%>%
  mutate(PositionType = case_when(CommitteeRankingMember == 1 ~ "Committee Ranking Member"
  ))

meancontributionsLeader<- group_by(republicansallcontributions, Objector, Leadership2021)
meancontributionsLeader<- summarize(meancontributionsLeader, 
                                    mean = mean(ContributionThousands2022))
meancontributionsLeader <- meancontributionsLeader[-1,]
meancontributionsLeader <- meancontributionsLeader[-2,]

meancontributionsLeader<- meancontributionsLeader%>%
  mutate(PositionType = case_when(Leadership2021 == 1 ~ "Leadership"
  ))

meancontributionsBigFour<- group_by(republicansallcontributions, Objector, BigFour2021)
meancontributionsBigFour<- summarize(meancontributionsBigFour, 
                                     mean = mean(ContributionThousands2022))
meancontributionsBigFour <- meancontributionsBigFour[-1,]
meancontributionsBigFour <- meancontributionsBigFour[-2,]

meancontributionsBigFour<- meancontributionsBigFour%>%
  mutate(PositionType = case_when(BigFour2021 == 1 ~ "Big Four Committee"
  ))

meancontributionsAllGOP<- group_by(republicansallcontributions, Objector)
meancontributionsAllGOP<- summarize(meancontributionsAllGOP, 
                                    mean = mean(ContributionThousands2022))
meancontributionsAllGOP<- meancontributionsAllGOP%>%
  mutate(PositionType = "All Republicans")

#Making all of the dataframes the same number of columns
meancontributionsBigFour <- meancontributionsBigFour[,-2]
meancontributionsLeader <- meancontributionsLeader[,-2]
meancontributionsCommittee <- meancontributionsCommittee[,-2]

meansforplot<- rbind(meancontributionsAllGOP, meancontributionsBigFour, meancontributionsCommittee, meancontributionsLeader)

#Figure 1: 
Figure1<- ggplot(meansforplot,aes(x=PositionType,y=mean,fill=factor(Objector)))+
  geom_bar(stat="identity",position="dodge")+
  scale_fill_discrete(name="Objector",
                      breaks=c(0, 1),
                      labels=c("Non-Objector", "Objector"))+
  scale_fill_grey()+
  xlab("Position Type")+ylab("Mean Contributions from Fortune 500 PACs")+
  labs(fill = "Objector Status")
Figure1

#Table 2 - updated so that contribution totals are adjusted for inflation~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#Model 1 
olsmodel1smaller<- lm(ContributionThousands2022~ObjectorFactor + CommitteeRankingMember2021 + SubcommitteeRankingMember2021 + LeadershipFactor + BigFourFactor + Retiring2022 + NominateDifference + Seniority + CompetitiveSeat, data = olsdatasmaller)
summary(olsmodel1smaller)
sd(olsdatasmaller$ContributionThousands2022)

# Model 2 - fixed effects and using inflation adjusted data
fixedeffectsmodel<- plm(FinalInflationAdjusted ~ ObjectorFactor + PeriodFactor + ObjectorFactor:PeriodFactor + CmteChairRanking + SubCmteChairRanking + Leadership + BigFourFactor  + Retiring + NominateDifference + CompetitiveSeat + Seniority, data = pdata, index = "ICPSRFactor", model = "within")
summary(fixedeffectsmodel)

#Calculating clustered standard errors for the fixed effects model above
cluster_se<- vcovHC(fixedeffectsmodel, type = "HC0", cluster = "group")
robust_se<- coeftest(fixedeffectsmodel, vcov = cluster_se)
print(robust_se)

#Figure 2 - mean contributions to objecting and non-objecting republicans by election cycle~~~~~~~~~~~~~~~
#Figure created via Excel. For data see here:

#mean contributions inflation-adjusted per Cycle
pdata %>%
  group_by(Cycle, Objector) %>%
  summarise(mean_contribution = mean(FinalInflationAdjusted, na.rm = TRUE))

#now compute expected 2022 contributions
# Compute mean contributions for 2018 and 2020 by Objector status
trend_data <- pdata %>%
  filter(Cycle %in% c(2018, 2020)) %>%
  group_by(Cycle, Objector) %>%
  summarise(mean_contribution = mean(FinalInflationAdjusted, na.rm = TRUE)) %>%
  ungroup()
# Spread data to wide format for easier calculations
trend_wide <- trend_data %>%
  tidyr::pivot_wider(names_from = Cycle, values_from = mean_contribution, names_prefix = "Cycle_")
# Calculate the trend (difference between 2018 and 2020)
trend_wide <- trend_wide %>%
  mutate(trend = Cycle_2020 - Cycle_2018,
         expected_2022 = Cycle_2020 + trend) # Linear projection

#Placebo Test~~~~~~~~~~~~~~~

#create placebo data
pdata_placebo <- pdata %>%
  filter(Cycle != 2022) %>%  # Remove the 2022 cycle
  mutate(PeriodFactor = ifelse(Cycle == 2020, 1, 0))  # Redefine period (2020 as treatment)
pdata_placebo <- pdata.frame(as.data.frame(pdata_placebo), index = c("ICPSRFactor", "Cycle"))
# Check the structure of the new dataframe
summary(pdata_placebo)
table(pdata_placebo$Cycle)  # Should only show 2018 and 2020
#rename 2020 inflation adjusted contributions
pdata_placebo$contributions2020<-pdata_placebo$X2020.Inflation.Adjusted

#run placebo model with 2020 as treatment period
placebomodel<- plm(contributions2020 ~ ObjectorFactor + PeriodFactor + ObjectorFactor:PeriodFactor + CmteChairRanking + SubCmteChairRanking + Leadership + BigFourFactor + NominateDifference + CompetitiveSeat + Seniority, data = pdata_placebo, index = "ICPSRFactor", model = "within")
summary(placebomodel)

#print, remember: no retiring members included here
stargazer(placebomodel, title = "Placebo Test 2018/2020", dep.var.labels = "", covariate.labels = c("Period","Committee Leader","Subcommittee Leader", "Party Leadership", "Big Four Committee", "Difference from Chamber Mean Ideology", "Competitive District", "Seniority", "Objector x Period"), type = "html", out = "placebo.doc")

#new DiD with cycle interaction objecter:cycle to allow objectors and non-objectors to have different slopes before 2022

#Model 3 accounts for pre-trends (2018 to 2020)
fixedeffectsmodel2 <- plm(FinalInflationAdjusted ~
                            ObjectorFactor * PeriodFactor +  # Standard DiD
                            ObjectorFactor * as.factor(Cycle) +  # Interactions for pre-trends
                            CmteChairRanking + SubCmteChairRanking + Leadership +
                            BigFourFactor + Retiring + NominateDifference +
                            CompetitiveSeat + Seniority,
                          data = pdata,
                          index = "ICPSRFactor",
                          model = "within")
summary(fixedeffectsmodel2)

###Objector:period has an even bigger effect now after controlling for everything + pre-trend
#print
stargazer(fixedeffectsmodel2, title = "Placebo Test 2018/2020", dep.var.labels = "", covariate.labels = c("Period","Cycle","Committee Leader","Subcommittee Leader", "Party Leadership", "Big Four Committee", "Retiring", "Difference from Chamber Mean Ideology", "Competitive District", "Seniority", "Objector x Period", "Objector x Cycle"), type = "html", out = "cycleinteraction.doc")



#Table 4 - triple interaction for leadership~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#Leadership
threecyclemodelleadershipfe<- plm(FinalInflationAdjusted ~ ObjectorFactor + PeriodFactor+ LeadershipFactor + LeadershipFactor:PeriodFactor + LeadershipFactor:ObjectorFactor + LeadershipFactor:ObjectorFactor:PeriodFactor + CompetitiveSeat + NominateDifference + Retiring + BigFourFactor + CmteChairRanking + SubCmteChairRanking + Seniority, data = pdata, index = "ICPSRFactor", model = "within")
summary(threecyclemodelleadershipfe)

pdata$CycleFactor<- as.factor(pdata$CycleFactor)

#Calculating clustered standard errors for the fixed effects model above
cluster_se2<- vcovHC(threecyclemodelleadershipfe, type = "HC0", cluster = "group")
robust_se2<- coeftest(threecyclemodelleadershipfe, vcov = cluster_se2)

#Figure 3: plot for triple interaction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Extract coefficient table
coef_table <- summary(threecyclemodelleadershipfe)$coefficients

# Extract relevant coefficients
b_Leadership <- coef_table["LeadershipFactor1", "Estimate"]
b_Period <- coef_table["PeriodFactor1", "Estimate"]
b_Leadership_Period <- coef_table["PeriodFactor1:LeadershipFactor1", "Estimate"]
b_Leadership_Objector <- coef_table["ObjectorFactor1:LeadershipFactor1", "Estimate"]
b_Leadership_Period_Objector <- coef_table["ObjectorFactor1:PeriodFactor1:LeadershipFactor1", "Estimate"]

# Extract coefficients for control variables
b_CompetitiveSeat <- coef_table["CompetitiveSeat", "Estimate"]
b_NominateDifference <- coef_table["NominateDifference", "Estimate"]
b_Retiring <- coef_table["Retiring", "Estimate"]
b_BigFour <- coef_table["BigFourFactor1", "Estimate"]  # Use BigFourFactor instead of BigFourFactor1
b_CmteChairRanking <- coef_table["CmteChairRanking", "Estimate"]
b_SubCmteChairRanking <- coef_table["SubCmteChairRanking", "Estimate"]
b_Seniority <- coef_table["Seniority", "Estimate"]

# Compute linear predictions for different values of PeriodFactor1 and ObjectorFactor1
library(dplyr)

linear_predictions_leadership <- expand.grid(
  PeriodFactor1 = c(0, 1),
  ObjectorFactor1 = c(0, 1)
) %>%
  mutate(
    LeadershipFactor1 = 1,  # Compute predictions for Leadership = 1
    CompetitiveSeat = mean(pdata$CompetitiveSeat, na.rm = TRUE),
    NominateDifference = mean(pdata$NominateDifference, na.rm = TRUE),
    Retiring = mean(pdata$Retiring, na.rm = TRUE),
    BigFourFactor = mean(as.numeric(as.character(pdata$BigFourFactor)), na.rm = TRUE),
    # Fix: Use BigFourFactor instead of BigFourFactor1
    CmteChairRanking = mean(pdata$CmteChairRanking, na.rm = TRUE),
    SubCmteChairRanking = mean(pdata$SubCmteChairRanking, na.rm = TRUE),
    Seniority = mean(pdata$Seniority, na.rm = TRUE),
    
    # Compute predicted values of FinalInflationAdjusted
    Predicted_Y = b_Leadership * LeadershipFactor1 +
      b_Period * PeriodFactor1 +
      b_Leadership_Period * (LeadershipFactor1 * PeriodFactor1) +
      b_Leadership_Objector * (LeadershipFactor1 * ObjectorFactor1) +
      b_Leadership_Period_Objector * (LeadershipFactor1 * PeriodFactor1 * ObjectorFactor1) +
      b_CompetitiveSeat * CompetitiveSeat +
      b_NominateDifference * NominateDifference +
      b_Retiring * Retiring +
      b_BigFour * BigFourFactor +  # Fix: Use BigFourFactor instead of BigFourFactor1
      b_CmteChairRanking * CmteChairRanking +
      b_SubCmteChairRanking * SubCmteChairRanking +
      b_Seniority * Seniority
  )

# Print linear predictions
print(linear_predictions_leadership)

#Plot

library(ggplot2)

ggplot(linear_predictions_leadership, aes(x = PeriodFactor1, y = Predicted_Y, 
                                          group = factor(ObjectorFactor1), 
                                          color = factor(ObjectorFactor1),
                                          shape = factor(ObjectorFactor1))) +
  geom_point(size = 3) +  # Plot points
  geom_line() +  # Connect points with lines
  geom_errorbar(aes(ymin = Predicted_Y - 1.96 * sd(Predicted_Y),  # Dummy CI
                    ymax = Predicted_Y + 1.96 * sd(Predicted_Y)), 
                width = 0.2, color = "gray") +  
  labs(
    title = "Figure 3: Marginal Effects of Leadership",
    subtitle = "Conditional on Period and Objector Status",
    x = "Period",
    y = "Linear Prediction",
    color = "Objector",
    shape = "Objector"
  ) +
  theme_minimal() +
  scale_color_manual(values = c("black", "gray50")) +  # Match Stata colors
  scale_shape_manual(values = c(16, 18)) +  # Use different point shapes
  theme(legend.position = "bottom")  # Move legend to bottom


  
#Table 5: Big Four triple intraction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
threecyclemodelbigfourfe<- plm(FinalInflationAdjusted ~ ObjectorFactor + PeriodFactor+ BigFourFactor + BigFourFactor:PeriodFactor + BigFourFactor:ObjectorFactor + BigFourFactor:ObjectorFactor:PeriodFactor + CompetitiveSeat + NominateDifference + Retiring + Leadership + CmteChairRanking + SubCmteChairRanking + Seniority, data = pdata, index = "ICPSRFactor", model = "within")
summary(threecyclemodelbigfourfe)

#Calculating clustered standard errors for the fixed effects model above
cluster_se1<- vcovHC(threecyclemodelbigfourfe, type = "HC0", cluster = "group")
robust_se1<- coeftest(threecyclemodelbigfourfe, vcov = cluster_se1)
print(robust_se1)

#Figure 4 - triple interaction Big 4~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Extract coefficient table
coef_table <- summary(threecyclemodelbigfourfe)$coefficients

# Extract relevant coefficients
b_Intercept <- 0  # No explicit intercept in fixed effects models
b_BigFour <- coef_table["BigFourFactor1", "Estimate"]
b_Period <- coef_table["PeriodFactor1", "Estimate"]
b_BigFour_Period <- coef_table["PeriodFactor1:BigFourFactor1", "Estimate"]
b_BigFour_Objector <- coef_table["ObjectorFactor1:BigFourFactor1", "Estimate"]
b_BigFour_Period_Objector <- coef_table["ObjectorFactor1:PeriodFactor1:BigFourFactor1", "Estimate"]

# Add other control variables (set them to their means for baseline predictions)
b_CompetitiveSeat <- coef_table["CompetitiveSeat", "Estimate"]
b_NominateDifference <- coef_table["NominateDifference", "Estimate"]
b_Retiring <- coef_table["Retiring", "Estimate"]
b_Leadership <- coef_table["Leadership", "Estimate"]
b_CmteChairRanking <- coef_table["CmteChairRanking", "Estimate"]
b_SubCmteChairRanking <- coef_table["SubCmteChairRanking", "Estimate"]
b_Seniority <- coef_table["Seniority", "Estimate"]

# Compute linear predictions for different values of PeriodFactor1 and ObjectorFactor1
library(dplyr)

linear_predictions_df <- expand.grid(
  PeriodFactor1 = c(0, 1),
  ObjectorFactor1 = c(0, 1)
) %>%
  mutate(
    BigFourFactor1 = 1,  # We are computing predictions for BigFour = 1
    CompetitiveSeat = mean(pdata$CompetitiveSeat, na.rm = TRUE),
    NominateDifference = mean(pdata$NominateDifference, na.rm = TRUE),
    Retiring = mean(pdata$Retiring, na.rm = TRUE),
    Leadership = mean(pdata$Leadership, na.rm = TRUE),
    CmteChairRanking = mean(pdata$CmteChairRanking, na.rm = TRUE),
    SubCmteChairRanking = mean(pdata$SubCmteChairRanking, na.rm = TRUE),
    Seniority = mean(pdata$Seniority, na.rm = TRUE),
    
    # Compute the predicted values of FinalInflationAdjusted
    Predicted_Y = b_Intercept +
      b_BigFour * BigFourFactor1 +
      b_Period * PeriodFactor1 +
      b_BigFour_Period * (BigFourFactor1 * PeriodFactor1) +
      b_BigFour_Objector * (BigFourFactor1 * ObjectorFactor1) +
      b_BigFour_Period_Objector * (BigFourFactor1 * PeriodFactor1 * ObjectorFactor1) +
      b_CompetitiveSeat * CompetitiveSeat +
      b_NominateDifference * NominateDifference +
      b_Retiring * Retiring +
      b_Leadership * Leadership +
      b_CmteChairRanking * CmteChairRanking +
      b_SubCmteChairRanking * SubCmteChairRanking +
      b_Seniority * Seniority
  )

# Print linear predictions
print(linear_predictions_df)


#Plot

library(ggplot2)
library(dplyr)

# Generate linear predictions (ensure you have computed `linear_predictions_df`)
ggplot(linear_predictions_df, aes(x = PeriodFactor1, y = Predicted_Y, 
                                  group = factor(ObjectorFactor1), 
                                  color = factor(ObjectorFactor1),
                                  shape = factor(ObjectorFactor1))) +
  geom_point(size = 3) +  # Plot points
  geom_line() +  # Connect points with lines
  geom_errorbar(aes(ymin = Predicted_Y - 1.96 * sd(Predicted_Y),  # Dummy CI
                    ymax = Predicted_Y + 1.96 * sd(Predicted_Y)), 
                width = 0.2, color = "gray") +  
  labs(
    title = "Figure 4: Marginal Effects of Key Committee Membership",
    subtitle = "Conditional on Period and Objector Status",
    x = "Period",
    y = "Linear Prediction",
    color = "Objector",
    shape = "Objector"
  ) +
  theme_minimal() +
  scale_color_manual(values = c("black", "gray50")) +  # Match Stata colors
  scale_shape_manual(values = c(16, 18)) +  # Use different point shapes
  theme(legend.position = "bottom")  # Move legend to bottom


